{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data Processing Notebook\n",
    "\n",
    "This notebook demonstrates a pipeline for processing and analyzing customer sales data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Data Loading"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.DataFrame({\n",
    "    'customer_id': [101, 102, 103],\n",
    "    'sales': [250.0, 130.0, 400.0],\n",
    "    'region': ['North', 'East', 'West']\n",
    "})\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Data Processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_data(df, region_filter=None, normalize=False):\n",
    "    \"\"\"\n",
    "    Process customer sales data.\n",
    "\n",
    "    Args:\n",
    "        df (pd.DataFrame): The input DataFrame containing customer data.\n",
    "        region_filter (str, optional): A region to filter data. Defaults to None.\n",
    "        normalize (bool, optional): Whether to normalize sales data. Defaults to False.\n",
    "\n",
    "    Returns:\n",
    "        pd.DataFrame: Processed data with added total sales.\n",
    "    \"\"\"\n",
    "    if region_filter:\n",
    "        df = df[df['region'] == region_filter]\n",
    "\n",
    "    if normalize:\n",
    "        df['sales'] = (df['sales'] - df['sales'].min()) / (df['sales'].max() - df['sales'].min())\n",
    "\n",
    "    # Add a new column for cumulative sales\n",
    "    df['cumulative_sales'] = df['sales'].cumsum()\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Process the entire dataset without filtering\n",
    "all_data = process_data(data)\n",
    "all_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Filter and process data for the North region with normalization\n",
    "north_data = process_data(data, region_filter='North', normalize=True)\n",
    "north_data"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}